Paquetes utilizados:

library(readr)
library(DT)
library(nortest)
library(ggplot2)
library(nortest)
library(dplyr)
library(RColorBrewer)
library(gridExtra)
library(readxl)

1 Genes codificantes sin AS y con AS

1.1 Tabla de genes codificantes sin AS

datatable(coding_filtro)

1.2 Tabla de genes codificantes con AS

datatable(tabla_final_genes)

1.3 Comparación de tamaño.

barplot(c(median(tabla_final_genes$size),(median(coding_filtro$size))),names.arg = c("Si","No"), main = "Mediana tamaƱo", xlab="TamaƱo (pb)",legend.text = c("Si","No"), col=c("lightcoral","lightblue"),horiz = TRUE,las=1, cex.names = 0.9,cex.axis=0.8)

Resumen datos Genes con AS

##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##     595   22324   63394  157223  171915 2304996
lillie.test(tabla_final_genes$size)
## 
##  Lilliefors (Kolmogorov-Smirnov) normality test
## 
## data:  tabla_final_genes$size
## D = 0.26838, p-value < 2.2e-16

Resumen datos Genes sin AS

##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##      38    9232   25956   61483   66092 2473538

Test de normalidad:

lillie.test(coding_filtro$size)
## 
##  Lilliefors (Kolmogorov-Smirnov) normality test
## 
## data:  coding_filtro$size
## D = 0.29572, p-value < 2.2e-16

Test comparativo de distribuciones no paramƩtricas

wilcox.test(coding_filtro$size,tabla_final_genes$size)
## 
##  Wilcoxon rank sum test with continuity correction
## 
## data:  coding_filtro$size and tabla_final_genes$size
## W = 9457736, p-value < 2.2e-16
## alternative hypothesis: true location shift is not equal to 0

1.4 Comparación de transcritos alternativos.

proporcion_no_AS <- table(total_gene_no_AS$V7) #Contabilizar el nĆŗmero de transcritos
transcript_no_AS<- rbind(as.data.frame(proporcion_no_AS))

summary(transcript_no_AS$Freq)
##     Min.  1st Qu.   Median     Mean  3rd Qu.     Max. 
##    1.000    3.000    6.000    8.362   11.000 1211.000
lillie.test(transcript_no_AS$Freq)
## 
##  Lilliefors (Kolmogorov-Smirnov) normality test
## 
## data:  transcript_no_AS$Freq
## D = 0.27933, p-value < 2.2e-16
lillie.test(tabla_final_genes$number_trans)
## 
##  Lilliefors (Kolmogorov-Smirnov) normality test
## 
## data:  tabla_final_genes$number_trans
## D = 0.19158, p-value < 2.2e-16
wilcox.test(transcript_no_AS$Freq,tabla_final_genes$number_trans)
## 
##  Wilcoxon rank sum test with continuity correction
## 
## data:  transcript_no_AS$Freq and tabla_final_genes$number_trans
## W = 10948440, p-value < 2.2e-16
## alternative hypothesis: true location shift is not equal to 0

1.5 Distribución por cromosoma.

proporciones <- table(tabla_final_genes$chr)
proporciones_data_frame<- rbind(as.data.frame(proporciones))
proporciones_data_frame$Var1 <- factor(proporciones_data_frame$Var1,levels = c(seq(1:22),"X","Y"))
#Proporciones para los genes que contienen AS
summa <- sum(proporciones_data_frame$Freq) #Suma del total de genes que aparecen 
nuevo_data_frame <- proporciones_data_frame
for (i in 1:24){
  valor_chr <- proporciones_data_frame[i,2]
  normalizado <- valor_chr/summa #CÔlculo del número relativo
  variable_chr <- proporciones_data_frame[i,1]
  
  nuevo_data_frame[i,1] <- variable_chr
  nuevo_data_frame[i,2] <- round(normalizado,4)
}
colnames(nuevo_data_frame) <- c("Chr_AS","Freq_ajustada")
datatable(nuevo_data_frame)
#Proporciones para los genes que no tienen un AS
variable <- table(coding_filtro$chr)
df <- rbind(as.data.frame(variable))
summa1 <-sum(df$Freq) 
for (i in 1:24){
  valor_chr <- df[i,2]
  normalizado <- valor_chr/summa1
  variable_chr <- df[i,1]
  
  df[i,1] <- variable_chr
  df[i,2] <- round(normalizado,4)
}
datatable(df)
par(mfrow=c(1,2))
variable_genes_con_AS <- table(tabla_final_genes$chr)
barplot_chr_AS <- barplot(variable_genes_con_AS[c(seq(1:22),"X","Y")],font.axis=2,las=2,col=c("lightcoral"),ylab="Frecuencia", main = "NĆŗmero de genes",legend.text = "AS")
variable_genes_sin_AS <- table(coding_filtro$chr)
barplot_chr_genes <- barplot(variable_genes_sin_AS[c(seq(1:22),"X","Y")],font.axis=2,las=2, main="por cromosoma",col=c("lightblue"),ylab="Frecuencia", legend.text = "Sin AS")

1.6 Secuencias Repetidas

1.6.1 Referencia

total_repeat <- table(total_Repeat$family_repeat) #Calculo del tipo de repetición mÔs repetida
barplot(total_repeat,font.axis=2,las=2, main="Frecuencia Aparición Secuencias Repetidas",col=c("lightcoral"),ylab="Frecuencia",cex.names = 0.65,cex.axis=0.6)

total_repeat <- table(total_Repeat$type_repeat) #Contabilizar las veces que se repite cada repetición y la familia a la que pertenece
barplot(total_repeat,font.axis=2,las=2, main="Frecuencia Aparición Secuencias Repetidas",col=c("lightcoral"),ylab="Frecuencia",cex.names = 0.65,cex.axis=0.6)

total_repeat_table<- rbind(as.data.frame(total_repeat))
#datatable(total_repeat_table,colnames =c("Nombre Gen","Frecuencia"))
suma_repe <- sum(total_repeat_table$Freq)
mat = matrix(ncol = 0, nrow = 0)
df=data.frame(mat)
for (i in 1:13){
  valor_chr <- total_repeat_table[i,2]
  normalizado <- valor_chr/suma_repe
  variable_chr <- total_repeat_table[i,1]
  
  df[i,1] <- variable_chr
  df[i,2] <- round(normalizado,7)
}
datatable(df)

1.6.2 Codificantes con AS

gene_prot_coding_repeat <- table(intersect_gene_repeat$type_repeat)
barplot(gene_prot_coding_repeat,font.axis=2,las=2, main="Frecuencia Aparición Secuencias Repetidas En Genes Codificantes con AS",col=c("lightcoral"),ylab="Frecuencia",cex.names = 0.65,cex.axis=0.6,cex.main=1)

repeat_gene_prot_coding<- rbind(as.data.frame(gene_prot_coding_repeat ))
#datatable(repeat_gene_prot_coding)
suma_repe <- sum(repeat_gene_prot_coding$Freq)
mat = matrix(ncol = 0, nrow = 0)
df=data.frame(mat)
for (i in 1:13){
  valor_chr <- repeat_gene_prot_coding[i,2]
  normalizado <- valor_chr/suma_repe
  variable_chr <- repeat_gene_prot_coding[i,1]
  
  df[i,1] <- variable_chr
  df[i,2] <- round(normalizado,7)
}
datatable(df)
repeat_gene <- table(intersect_gene_repeat$name_gen) #CƔlculo del numero de veces que aparece cada gen 
repeat_gene_data_frame<- rbind(as.data.frame(repeat_gene))
datatable(repeat_gene_data_frame)

1.7 Codificantes sin AS

prot_coding_repeat <- table(intersect_total_gene$family)
barplot(prot_coding_repeat,font.axis=2,las=2, main="Frecuencia Aparición Secuencias Repetidas En Genes Codificantes sin AS",col=c("lightblue"),ylab="Frecuencia",cex.names =0.65,cex.axis=0.6,cex.main=1)

coding_total <- table(intersect_total_gene$type_repeat)
barplot(coding_total,font.axis=2,las=2, main="Frecuencia Aparición Secuencias Repetidas En Genes Codificantes sin AS",col=c("lightblue"),ylab="Frecuencia",cex.names =0.65,cex.axis=0.6,cex.main=1)

repeat_gene_data_frame_1<- rbind(as.data.frame(coding_total))
#datatable(repeat_gene_data_frame_1)
suma_repe <- sum(repeat_gene_data_frame_1$Freq)
mat = matrix(ncol = 0, nrow = 0)
df=data.frame(mat)
for (i in 1:13){
  valor_chr <- repeat_gene_data_frame_1[i,2]
  normalizado <- valor_chr/suma_repe
  variable_chr <- repeat_gene_data_frame_1[i,1]
  
  df[i,1] <- variable_chr
  df[i,2] <- round(normalizado,7)
}
datatable(df)

2 Gen antisentido y Gen Sentido.

2.1 Tabla de genes AS

datatable(tabla_final_AS)

2.2 Tabla conjunta.

nuevo_out <- dplyr::left_join(primer_filtrado,prot_coding_comparar_1556,by=c("X8"= "X6")) #Reunión de las tablas generadas con left join para comprobar que ningún gen queda sin su pareja
nuevo_out$X1.y <- gsub('chr', '',nuevo_out$X1.y) #Con gsub se elimina la parte de "chr" antes del nĆŗmero del cromosoma para mantener el mismo formato
nuevo_out <- nuevo_out[,c(1,2,3,4,5,7,8,9,10,11,12,13)]

datatable(nuevo_out,colnames = c("Id_ens_AS","chr_AS","Pos_init","Pos_fin","Strand","AS","Gene","Chr_gene","Pos_init_gen","Pos_fin_gen","Strand_gene","ID_gene"))

2.3 Comparación Tamaño.

barplot(c(median(tabla_final_AS$size),(median(tabla_final_genes$size))),names.arg = c("AS","S"), xlab="TamaƱo (pb)",main = "Mediana tamaƱo", col=c("lightcoral","lightblue"),horiz = TRUE,las=1, cex.names = 0.9,cex.axis=0.8)

#legend("bottomright", legend = c("Antisentido","Sentido"),fill=c("lightcoral","lightblue"))
tabla_conjunta$type <- factor(tabla_conjunta$type,levels=c("GEN","AS"), labels = c("S","AS")) #Modificación del nombre referido a cada tipo de gen
p <- ggplot(tabla_conjunta, aes(x=type, y=size,fill=type)) + 
  geom_boxplot(outlier.size=1,outlier.shape=5)+ scale_y_continuous(name = "Tamaño (pb)")+scale_x_discrete(name =" " )+theme_classic()+theme(plot.title=element_text(hjust=0.5))+coord_flip()+labs(title="Distribución de Tamaños")+ theme(legend.position="none")
p

Resumen datos AS

summary(tabla_final_AS$size)
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##     330    3410   10380   38184   34290 1064401
#Test de normalidad para el tamaƱo de los genes AS
lillie.test(tabla_final_AS$size)
## 
##  Lilliefors (Kolmogorov-Smirnov) normality test
## 
## data:  tabla_final_AS$size
## D = 0.32095, p-value < 2.2e-16

Resumen datos gen

summary(tabla_final_genes$size)
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##     595   22324   63394  157223  171915 2304996
#Test de normalidad para el tamaƱo de los genes AS
lillie.test(tabla_final_genes$size)
## 
##  Lilliefors (Kolmogorov-Smirnov) normality test
## 
## data:  tabla_final_genes$size
## D = 0.26838, p-value < 2.2e-16
#Comparación de medianas mediante un test no parÔmetrico.
wilcoxon <- wilcox.test(tabla_final_AS$size,tabla_final_genes$size)
wilcoxon
## 
##  Wilcoxon rank sum test with continuity correction
## 
## data:  tabla_final_AS$size and tabla_final_genes$size
## W = 573754, p-value < 2.2e-16
## alternative hypothesis: true location shift is not equal to 0

2.4 Comparación de transcritos alternativos

barplot(c(median(tabla_final_AS$number_trans),(median(tabla_final_genes$number_trans))),names.arg = c("AS","GEN"), main = "Mediana Número TrÔnscritos", xlab="Número de TrÔnscritos",col=c("lightcoral","lightblue"),horiz = TRUE,las=1, cex.names = 0.9,cex.axis=0.8)

Resumen Transcritos AS

summary(tabla_final_AS$number_trans)
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##    1.00    1.00    2.00    4.73    5.00  283.00

Resumen Transcritos Genes con AS

summary(tabla_final_genes$number_trans)
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##    1.00    4.00    8.00   11.14   14.00  192.00
lillie.test(tabla_final_AS$number_trans)
## 
##  Lilliefors (Kolmogorov-Smirnov) normality test
## 
## data:  tabla_final_AS$number_trans
## D = 0.35695, p-value < 2.2e-16
lillie.test(tabla_final_genes$number_trans)
## 
##  Lilliefors (Kolmogorov-Smirnov) normality test
## 
## data:  tabla_final_genes$number_trans
## D = 0.19158, p-value < 2.2e-16
wilcox.test(tabla_final_AS$number_trans,tabla_final_genes$number_trans)
## 
##  Wilcoxon rank sum test with continuity correction
## 
## data:  tabla_final_AS$number_trans and tabla_final_genes$number_trans
## W = 515374, p-value < 2.2e-16
## alternative hypothesis: true location shift is not equal to 0

2.5 Intersect y Coverage

frecuencia <- table(round(coverage_AS_gen_encabezado$value3,1)) #Se reducen los decimales que se contabilizan los datos para poder establecer relaciones
frecuencia_tabla <- rbind(as.data.frame(frecuencia))
datatable(frecuencia_tabla)
barplot(frecuencia,main = "Solapamiento AS - GEN", xlab="Frecuencia",xlim=c(0,800),ylab="Grado de solapamiento",col=c("lightcoral"),horiz = TRUE,las=1, cex.names = 0.9,cex.axis=0.8)

genes <- table(intersect_AS_gen_encabezado$gene_name) #Veces que se solapa un antisentido con un gen sentido
porcentaje_tabla <- rbind(as.data.frame(genes))
datatable(porcentaje_tabla)

2.6 AS

repeat_AS <- table(intersect_AS_repeat$type_repeat)
barplot(repeat_AS,font.axis=2,las=2, main="Frecuencia Aparición Secuencias Repetidas Solapadas con AS",col=c("lightcoral"),ylab="Frecuencia",cex.names = 0.9,cex.axis=0.8)

repeat_AS <- table(intersect_AS_repeat$type_repeat)
prop_repeat_AS <- rbind(as.data.frame(repeat_AS))
#datatable(prop_repeat_AS)
suma_repe <- sum(prop_repeat_AS$Freq)
mat = matrix(ncol = 0, nrow = 0)
df=data.frame(mat)
for (i in 1:13){
  valor_chr <- prop_repeat_AS[i,2]
  normalizado <- valor_chr/suma_repe
  variable_chr <- prop_repeat_AS[i,1]
  
  df[i,1] <- variable_chr
  df[i,2] <- round(normalizado,4)
}
datatable(df)
repeat_AS <- table(intersect_AS_repeat$name_gen) #Veces que se solapa un antisentido con una secuencia repetida.
repeat_AS_data_frame<- rbind(as.data.frame(repeat_AS))
datatable(repeat_AS_data_frame)

3 Expresión higados humanos.

OUT_Prot_coding$associated_gene <- gsub('\\..*$', '',OUT_Prot_coding$associated_gene) #Eliminación de los números que hay después de los puntos
OUT_AS$associated_gene <- gsub('\\..*$', '',OUT_AS$associated_gene) 

table_out_AS <- table(OUT_AS$associated_gene)
df_AS_total <- rbind(as.data.frame(table_out_AS))
ID_AS <- Nombre_AS_Gen[,c(1,7)]
t1 <- ID_AS %>% distinct(X7, .keep_all = TRUE) #Filtrado para que no se repitan isoformas 
ID_del_AS_con_GEN <- Nombre_AS_Gen[,c(1,8)]
t_id_AS_gene <- ID_del_AS_con_GEN %>% distinct(X8, .keep_all = TRUE)
id_nombre <-dplyr::inner_join(df_AS_total,t1,by=c("Var1" = "X1"))
genes_AS_e_id <- id_nombre[,c(1,3)]
isoforma_id_nombre <-dplyr::inner_join(ID_del_AS_con_GEN,genes_AS_e_id,by=c("X1" = "Var1")) #Reunión de tablas que coincidan en el ID del gen antisentido
OUT_Prot_coding$associated_gene <- gsub('\\..*$', '',OUT_Prot_coding$associated_gene)
table_out_gene <- table(OUT_Prot_coding$associated_gene)
df_gene_total <- rbind(as.data.frame(table_out_gene))
ID_gene <- prot_coding_comparar_1556[,c(5,6)] #Selección del id del gen sentido con su nombre
t2<- ID_gene %>% distinct(X5, .keep_all = TRUE) #Filtrar ante posibles repeticiones del id.
id_nombre <-dplyr::inner_join(df_gene_total,t2,by=c("Var1" = "X5")) #Identificación del gen sentido que se expresa
id_nombre_gen <- id_nombre[,c(1,3)]
isoforma_y_gen <- OUT_AS[,c(1,7)]
AS_no_Sentido <- dplyr::left_join(isoforma_id_nombre,id_nombre_gen,by=c("X8"="X6")) #Con left join seleccionamos los genes AS que se expresan en el hĆ­gaod, cuando aparezca una celda vacĆ­a la tabla significa que no se expresa el gen sentido. 
id_AS_name_no_expr_sent <- AS_no_Sentido[c(2,3,7,17,20,21,26,28,29,32,33,37,42,47,48,53,55,56,63,64,67,70,75,77,79,85,86,97,104,107,108,112,114,116,124,128,130,140,142,146,147),c(1,3,2)]
datatable(id_AS_name_no_expr_sent)
AS_matriz <- exprMatrix_AS
AS_matriz$ids <- gsub('_.*$', '',AS_matriz$ids) #Eliminación del gen asociado a cada isoforma del archivo de la matriz de expresión
isoforma_id_nombre <-dplyr::inner_join(isoforma_y_gen,ID_AS,by=c("associated_gene" = "X1")) #Relacionar la isoforma creada anteriormente con el otro fichero que relaciona a la isoforma con el id del gen
id_gen_noAS_noexprx <- id_AS_name_no_expr_sent[,c(1,3)]
isoforma_id_nombre <-dplyr::inner_join(isoforma_id_nombre,id_gen_noAS_noexprx,by=c("associated_gene" = "X1"))

uniq_matriz_AS_no_gene_id <- isoforma_id_nombre %>% distinct(isoform, .keep_all = TRUE)
datatable(uniq_matriz_AS_no_gene_id) #Genes Antisentido que no expresan el gen sentido

3.1 Genes AS

Matriz_AS <- exprMatrix_AS
colnames(Matriz_AS) <- c("ID","OM001","OM010","OM012","OM015","OM019","OM025","OM028","OM029","OM032","OM04","OM05","OM06","OM009","OM33","OM49")
heatmap_AS <- Matriz_AS
heatmap_matrix_AS <- as.matrix(heatmap_AS[,2:16]) #Matriz de expresión de los transcritos de genes AS
rownames(heatmap_matrix_AS) <- exprMatrix_AS$ids
heatmap(heatmap_matrix_AS,Rowv=NA,Colv=NA,col= colorRampPalette(brewer.pal(5,"YlOrRd"))(4),cexRow=0.01,cexCol = 0.7)

boxplot(Matriz_AS[,2:16],cex.axis=0.7,las=2,col=1:7,ylab="TPM",horizontal = TRUE)

A continuación se genera un grÔfico de la distribución de la expresión de los genes antisentido por paciente. Se realiza una cuenta de la expresión sin decimales para agrupar por número entero y se represtan al final todos en un grÔfico.

OM001 <- table(round(Matriz_AS$OM001,0))
OM001_df <- rbind(as.data.frame(OM001))
OM001_plot <- ggplot(OM001_df,aes(x=Var1,y=Freq,fill=Freq))+geom_col()+geom_line(group=1)+theme_minimal()+theme(legend.position="none")+labs(x="OM001")+theme(axis.title.x = element_text(face="bold", vjust=-0.5, colour="orange", size=rel(1)),axis.title=element_text(size=10,face="bold"))+theme(axis.text.x = element_text(angle = 90, hjust = 1),axis.text = element_text(size=6))
OM010 <- table(round(Matriz_AS$OM010,0))
OM010_df <- rbind(as.data.frame(OM010))
OM010_plot <- ggplot(OM010_df,aes(x=Var1,y=Freq,fill=Freq))+geom_col()+geom_line(group=1)+theme_minimal()+theme(legend.position="none")+labs(x="OM010")+theme(axis.title.x = element_text(face="bold", vjust=-0.5, colour="orange", size=rel(1)),axis.title=element_text(size=10,face="bold"))+theme(axis.text.x = element_text(angle = 90, hjust = 1),axis.text = element_text(size=6))
OM012 <- table(round(Matriz_AS$OM012,0))
OM012_df <- rbind(as.data.frame(OM012))
OM012_plot <- ggplot(OM012_df,aes(x=Var1,y=Freq,fill=Freq))+geom_col()+geom_line(group=1)+theme_minimal()+theme(legend.position="none")+labs(x="OM012")+theme(axis.title.x = element_text(face="bold", vjust=-0.5, colour="orange", size=rel(1)),axis.title=element_text(size=10,face="bold"))+theme(axis.text.x = element_text(angle = 90, hjust = 1),axis.text = element_text(size=6))
OM015 <- table(round(Matriz_AS$OM015,0))
OM015_df <- rbind(as.data.frame(OM015))
OM015_plot <- ggplot(OM015_df,aes(x=Var1,y=Freq,fill=Freq))+geom_col()+geom_line(group=1)+theme_minimal()+theme(legend.position="none")+labs(x="OM015")+theme(axis.title.x = element_text(face="bold", vjust=-0.5, colour="orange", size=rel(1)),axis.title=element_text(size=10,face="bold"))+theme(axis.text.x = element_text(angle = 90, hjust = 1),axis.text = element_text(size=6))
OM019 <- table(round(Matriz_AS$OM019,0))
OM019_df <- rbind(as.data.frame(OM019))
OM019_plot <- ggplot(OM019_df,aes(x=Var1,y=Freq,fill=Freq))+geom_col()+geom_line(group=1)+theme_minimal()+theme(legend.position="none")+labs(x="OM019")+theme(axis.title.x = element_text(face="bold", vjust=-0.5, colour="orange", size=rel(1)),axis.title=element_text(size=10,face="bold"))+theme(axis.text.x = element_text(angle = 90, hjust = 1),axis.text = element_text(size=6))
OM025 <- table(round(Matriz_AS$OM025,0))
OM025_df <- rbind(as.data.frame(OM025))
OM025_plot <- ggplot(OM025_df,aes(x=Var1,y=Freq,fill=Freq))+geom_col()+geom_line(group=1)+theme_minimal()+theme(legend.position="none")+labs(x="OM025")+theme(axis.title.x = element_text(face="bold", vjust=-0.5, colour="orange", size=rel(1)),axis.title=element_text(size=10,face="bold"))+theme(axis.text.x = element_text(angle = 90, hjust = 1),axis.text = element_text(size=6))
OM028 <- table(round(Matriz_AS$OM028,0))
OM028_df <- rbind(as.data.frame(OM028))
OM028_plot <- ggplot(OM028_df,aes(x=Var1,y=Freq,fill=Freq))+geom_col()+geom_line(group=1)+theme_minimal()+theme(legend.position="none")+labs(x="OM028")+theme(axis.title.x = element_text(face="bold", vjust=-0.5, colour="orange", size=rel(1)),axis.title=element_text(size=10,face="bold"))+theme(axis.text.x = element_text(angle = 90, hjust = 1),axis.text = element_text(size=6))
OM029 <- table(round(Matriz_AS$OM029,0))
OM029_df <- rbind(as.data.frame(OM029))
OM029_plot <- ggplot(OM029_df,aes(x=Var1,y=Freq,fill=Freq))+geom_col()+geom_line(group=1)+theme_minimal()+theme(legend.position="none")+labs(x="OM029")+theme(axis.title.x = element_text(face="bold", vjust=-0.5, colour="orange", size=rel(1)),axis.title=element_text(size=10,face="bold"))+theme(axis.text.x = element_text(angle = 90, hjust = 1),axis.text = element_text(size=6))
OM032 <- table(round(Matriz_AS$OM032,0))
OM032_df <- rbind(as.data.frame(OM032))
OM032_plot <- ggplot(OM032_df,aes(x=Var1,y=Freq,fill=Freq))+geom_col()+geom_line(group=1)+theme_minimal()+theme(legend.position="none")+labs(x="OM032")+theme(axis.title.x = element_text(face="bold", vjust=-0.5, colour="orange", size=rel(1)),axis.title=element_text(size=10,face="bold"))+theme(axis.text.x = element_text(angle = 90, hjust = 1),axis.text = element_text(size=6))
OM04 <- table(round(Matriz_AS$OM04,0))
OM04_df <- rbind(as.data.frame(OM04))
OM04_plot <- ggplot(OM04_df,aes(x=Var1,y=Freq,fill=Freq))+geom_col()+geom_line(group=1)+theme_minimal()+theme(legend.position="none")+labs(x="OM04")+theme(axis.title.x = element_text(face="bold", vjust=-0.5, colour="orange", size=rel(1)),axis.title=element_text(size=10,face="bold"))+theme(axis.text.x = element_text(angle = 90, hjust = 1),axis.text = element_text(size=6))
OM05 <- table(round(Matriz_AS$OM05,0))
OM05_df <- rbind(as.data.frame(OM05))
OM05_plot <- ggplot(OM05_df,aes(x=Var1,y=Freq,fill=Freq))+geom_col()+geom_line(group=1)+theme_minimal()+theme(legend.position="none")+labs(x="OM05")+theme(axis.title.x = element_text(face="bold", vjust=-0.5, colour="orange", size=rel(1)),axis.title=element_text(size=10,face="bold"))+theme(axis.text.x = element_text(angle = 90, hjust = 1),axis.text = element_text(size=6))
OM06 <- table(round(Matriz_AS$OM06,0))
OM06_df <- rbind(as.data.frame(OM06))
OM06_plot <- ggplot(OM06_df,aes(x=Var1,y=Freq,fill=Freq))+geom_col()+geom_line(group=1)+theme_minimal()+theme(legend.position="none")+labs(x="OM06")+theme(axis.title.x = element_text(face="bold", vjust=-0.5, colour="orange", size=rel(1)),axis.title=element_text(size=10,face="bold"))+theme(axis.text.x = element_text(angle = 90, hjust = 1),axis.text = element_text(size=6))
OM009 <- table(round(Matriz_AS$OM009,0))
OM009_df <- rbind(as.data.frame(OM009))
OM009_plot <- ggplot(OM009_df,aes(x=Var1,y=Freq,fill=Freq))+geom_col()+geom_line(group=1)+theme_minimal()+theme(legend.position="none")+labs(x="OM009")+theme(axis.title.x = element_text(face="bold", vjust=-0.5, colour="orange", size=rel(1)),axis.title=element_text(size=10,face="bold"))+theme(axis.text.x = element_text(angle = 90, hjust = 1),axis.text = element_text(size=6))
OM33 <- table(round(Matriz_AS$OM33,0))
OM33_df <- rbind(as.data.frame(OM33))
OM33_plot <- ggplot(OM33_df,aes(x=Var1,y=Freq,fill=Freq))+geom_col()+geom_line(group=1)+theme_minimal()+theme(legend.position="none")+labs(x="OM33")+theme(axis.title.x = element_text(face="bold", vjust=-0.5, colour="orange", size=rel(1)),axis.title=element_text(size=10,face="bold"))+theme(axis.text.x = element_text(angle = 90, hjust = 1),axis.text = element_text(size=6))
OM49 <- table(round(Matriz_AS$OM49,0))
OM49_df <- rbind(as.data.frame(OM49))
OM49_plot <- ggplot(OM49_df,aes(x=Var1,y=Freq,fill=Freq))+geom_col()+geom_line(group=1)+theme_minimal()+theme(legend.position="none")+labs(x="OM49")+theme(axis.title.x = element_text(face="bold", vjust=-0.5, colour="orange", size=rel(1)),axis.title=element_text(size=10,face="bold"))+theme(axis.text.x = element_text(angle = 90, hjust = 1),axis.text = element_text(size=6))
grid.arrange(OM001_plot,OM010_plot,OM012_plot,OM015_plot,OM019_plot,OM025_plot,OM028_plot,OM029_plot,OM032_plot,OM04_plot,OM05_plot,OM06_plot,OM009_plot,OM33_plot,OM49_plot,ncol=5)

AS_matriz <- exprMatrix_AS
AS_matriz$ids <- gsub('_.*$', '',AS_matriz$ids)

isoforma_y_gen_AS <- OUT_AS[,c(1,7)]
isoforma_y_gen_AS$associated_gene <- gsub('\\..*$', '',isoforma_y_gen_AS$associated_gene)

isoforma_id_nombre_AS <-dplyr::inner_join(isoforma_y_gen_AS,ID_AS,by=c("associated_gene" = "X1"))

matrix_nombre_id_AS <- dplyr::inner_join(isoforma_id_nombre_AS,AS_matriz,by=c("isoform" = "ids"))

Expresión media

para_media <- matrix_nombre_id_AS
AS_TpmRNA_mean <- apply(para_media[,c(4:18)], 1, mean) #Media de expresión para cada isoforma para los genes antisentido
out_cambiado_prot_coding <- cbind(para_media,AS_TpmRNA_mean)
isoforma_y_media_AS <- out_cambiado_prot_coding[,c(1,2,3,19)]
datatable(isoforma_y_media_AS)

Siguiendo el modelo anterior se realizan los mismos para agrupar la expresión de cada isoforma en base al número entero de su expresión en TPM

Isoform <- table(round(isoforma_y_media_AS$AS_TpmRNA_mean,0))
isoform_df <- rbind(as.data.frame(Isoform))
isoform_plot <- ggplot(isoform_df,aes(x=Var1,y=Freq,fill=Freq))+geom_col()+theme_minimal()+theme(legend.position="none")+labs(x="Expresión TPM")+theme(axis.title.x = element_text(face="bold", vjust=-0.5, colour="orange", size=rel(1)),axis.title=element_text(size=10,face="bold"))+theme(axis.text.x = element_text(angle = 90, hjust = 1),axis.text = element_text(size=6))+geom_line(group=1)
isoform_plot

isoform_mean_AS <- isoforma_y_media_AS[,c(1,4)]
id_nombre <-dplyr::inner_join(uniq_matriz_AS_no_gene_id,isoform_mean_AS,by="isoform")
uniq_AS_no_expr_gen_name_id <- id_nombre %>% distinct(isoform, .keep_all = TRUE)
genes_AS_no_S <- table(uniq_AS_no_expr_gen_name_id$X7)
OUT_Prot_coding$associated_gene <- gsub('\\..*$', '',OUT_Prot_coding$associated_gene)

table_out_gene <- table(OUT_Prot_coding$associated_gene)
df_gene_total <- rbind(as.data.frame(table_out_gene))
ID_gene <- prot_coding_comparar_1556[,c(5,6)]
t2<- ID_gene %>% distinct(X5, .keep_all = TRUE)
id_nombre <-dplyr::inner_join(df_gene_total,t2,by=c("Var1" = "X5"))
id_nombre_gen <- id_nombre[,c(1,3)]
media_expresion_mas_gen <-dplyr::inner_join(isoforma_y_media_AS,t_id_AS_gene,by=c("associated_gene" = "X1"))

3.2 Genes S

Para los genes sentido sentido se sigue el mismo modelo anterior.

OUT_prueba <- exprMatrix_Prot_coding
colnames(OUT_prueba) <- c("ID","OM001","OM010","OM012","OM015","OM019","OM025","OM028","OM029","OM032","OM04","OM05","OM06","OM009","OM33","OM49")
heatmap_AS <- OUT_prueba
heatmap_matrix_AS <- as.matrix(heatmap_AS[,2:16])
rownames(heatmap_matrix_AS) <- exprMatrix_Prot_coding$ids
heatmap(heatmap_matrix_AS,Rowv=NA,Colv=NA,col= colorRampPalette(brewer.pal(5,"YlOrRd"))(4),cexRow=0.001)

OM001 <- table(round(OUT_prueba$OM001,0))
OM001_df <- rbind(as.data.frame(OM001))
OM001_plot <- ggplot(OM001_df,aes(x=Var1,y=Freq,fill=Freq))+geom_col()+geom_line(group=1)+theme_minimal()+theme(legend.position="none")+labs(x="OM001")+theme(axis.title.x = element_text(face="bold", vjust=-0.5, colour="orange", size=rel(1)),axis.title=element_text(size=10,face="bold"))+theme(axis.text.x = element_text(angle = 90, hjust = 1),axis.text = element_text(size=2))
OM010 <- table(round(OUT_prueba$OM010,0))
OM010_df <- rbind(as.data.frame(OM010))
OM010_plot <- ggplot(OM010_df,aes(x=Var1,y=Freq,fill=Freq))+geom_col()+geom_line(group=1)+theme_minimal()+theme(legend.position="none")+labs(x="OM010")+theme(axis.title.x = element_text(face="bold", vjust=-0.5, colour="orange", size=rel(1)),axis.title=element_text(size=10,face="bold"))+theme(axis.text.x = element_text(angle = 90, hjust = 1),axis.text = element_text(size=2))
OM012 <- table(round(OUT_prueba$OM012,0))
OM012_df <- rbind(as.data.frame(OM012))
OM012_plot <- ggplot(OM012_df,aes(x=Var1,y=Freq,fill=Freq))+geom_col()+geom_line(group=1)+theme_minimal()+theme(legend.position="none")+labs(x="OM012")+theme(axis.title.x = element_text(face="bold", vjust=-0.5, colour="orange", size=rel(1)),axis.title=element_text(size=10,face="bold"))+theme(axis.text.x = element_text(angle = 90, hjust = 1),axis.text = element_text(size=2))
OM015 <- table(round(OUT_prueba$OM015,0))
OM015_df <- rbind(as.data.frame(OM015))
OM015_plot <- ggplot(OM015_df,aes(x=Var1,y=Freq,fill=Freq))+geom_col()+geom_line(group=1)+theme_minimal()+theme(legend.position="none")+labs(x="OM015")+theme(axis.title.x = element_text(face="bold", vjust=-0.5, colour="orange", size=rel(1)),axis.title=element_text(size=10,face="bold"))+theme(axis.text.x = element_text(angle = 90, hjust = 1),axis.text = element_text(size=2))
OM019 <- table(round(OUT_prueba$OM019,0))
OM019_df <- rbind(as.data.frame(OM019))
OM019_plot <- ggplot(OM019_df,aes(x=Var1,y=Freq,fill=Freq))+geom_col()+geom_line(group=1)+theme_minimal()+theme(legend.position="none")+labs(x="OM019")+theme(axis.title.x = element_text(face="bold", vjust=-0.5, colour="orange", size=rel(1)),axis.title=element_text(size=10,face="bold"))+theme(axis.text.x = element_text(angle = 90, hjust = 1),axis.text = element_text(size=2))
OM025 <- table(round(OUT_prueba$OM025,0))
OM025_df <- rbind(as.data.frame(OM025))
OM025_plot <- ggplot(OM025_df,aes(x=Var1,y=Freq,fill=Freq))+geom_col()+geom_line(group=1)+theme_minimal()+theme(legend.position="none")+labs(x="OM025")+theme(axis.title.x = element_text(face="bold", vjust=-0.5, colour="orange", size=rel(1)),axis.title=element_text(size=10,face="bold"))+theme(axis.text.x = element_text(angle = 90, hjust = 1),axis.text = element_text(size=2))
OM028 <- table(round(OUT_prueba$OM028,0))
OM028_df <- rbind(as.data.frame(OM028))
OM028_plot <- ggplot(OM028_df,aes(x=Var1,y=Freq,fill=Freq))+geom_col()+geom_line(group=1)+theme_minimal()+theme(legend.position="none")+labs(x="OM028")+theme(axis.title.x = element_text(face="bold", vjust=-0.5, colour="orange", size=rel(1)),axis.title=element_text(size=10,face="bold"))+theme(axis.text.x = element_text(angle = 90, hjust = 1),axis.text = element_text(size=2))
OM029 <- table(round(OUT_prueba$OM029,0))
OM029_df <- rbind(as.data.frame(OM029))
OM029_plot <- ggplot(OM029_df,aes(x=Var1,y=Freq,fill=Freq))+geom_col()+geom_line(group=1)+theme_minimal()+theme(legend.position="none")+labs(x="OM029")+theme(axis.title.x = element_text(face="bold", vjust=-0.5, colour="orange", size=rel(1)),axis.title=element_text(size=10,face="bold"))+theme(axis.text.x = element_text(angle = 90, hjust = 1),axis.text = element_text(size=2))
OM032 <- table(round(OUT_prueba$OM032,0))
OM032_df <- rbind(as.data.frame(OM032))
OM032_plot <- ggplot(OM032_df,aes(x=Var1,y=Freq,fill=Freq))+geom_col()+geom_line(group=1)+theme_minimal()+theme(legend.position="none")+labs(x="OM032")+theme(axis.title.x = element_text(face="bold", vjust=-0.5, colour="orange", size=rel(1)),axis.title=element_text(size=10,face="bold"))+theme(axis.text.x = element_text(angle = 90, hjust = 1),axis.text = element_text(size=2))
OM04 <- table(round(OUT_prueba$OM04,0))
OM04_df <- rbind(as.data.frame(OM04))
OM04_plot <- ggplot(OM04_df,aes(x=Var1,y=Freq,fill=Freq))+geom_col()+geom_line(group=1)+theme_minimal()+theme(legend.position="none")+labs(x="OM04")+theme(axis.title.x = element_text(face="bold", vjust=-0.5, colour="orange", size=rel(1)),axis.title=element_text(size=10,face="bold"))+theme(axis.text.x = element_text(angle = 90, hjust = 1),axis.text = element_text(size=2))
OM05 <- table(round(OUT_prueba$OM05,0))
OM05_df <- rbind(as.data.frame(OM05))
OM05_plot <- ggplot(OM05_df,aes(x=Var1,y=Freq,fill=Freq))+geom_col()+geom_line(group=1)+theme_minimal()+theme(legend.position="none")+labs(x="OM05")+theme(axis.title.x = element_text(face="bold", vjust=-0.5, colour="orange", size=rel(1)),axis.title=element_text(size=10,face="bold"))+theme(axis.text.x = element_text(angle = 90, hjust = 1),axis.text = element_text(size=2))
OM06 <- table(round(OUT_prueba$OM06,0))
OM06_df <- rbind(as.data.frame(OM06))
OM06_plot <- ggplot(OM06_df,aes(x=Var1,y=Freq,fill=Freq))+geom_col()+geom_line(group=1)+theme_minimal()+theme(legend.position="none")+labs(x="OM06")+theme(axis.title.x = element_text(face="bold", vjust=-0.5, colour="orange", size=rel(1)),axis.title=element_text(size=10,face="bold"))+theme(axis.text.x = element_text(angle = 90, hjust = 1),axis.text = element_text(size=2))
OM009 <- table(round(OUT_prueba$OM009,0))
OM009_df <- rbind(as.data.frame(OM009))
OM009_plot <- ggplot(OM009_df,aes(x=Var1,y=Freq,fill=Freq))+geom_col()+geom_line(group=1)+theme_minimal()+theme(legend.position="none")+labs(x="OM009")+theme(axis.title.x = element_text(face="bold", vjust=-0.5, colour="orange", size=rel(1)),axis.title=element_text(size=10,face="bold"))+theme(axis.text.x = element_text(angle = 90, hjust = 1),axis.text = element_text(size=2))
OM33 <- table(round(OUT_prueba$OM33,0))
OM33_df <- rbind(as.data.frame(OM33))
OM33_plot <- ggplot(OM33_df,aes(x=Var1,y=Freq,fill=Freq))+geom_col()+geom_line(group=1)+theme_minimal()+theme(legend.position="none")+labs(x="OM33")+theme(axis.title.x = element_text(face="bold", vjust=-0.5, colour="orange", size=rel(1)),axis.title=element_text(size=10,face="bold"))+theme(axis.text.x = element_text(angle = 90, hjust = 1),axis.text = element_text(size=2))
OM49 <- table(round(OUT_prueba$OM49,0))
OM49_df <- rbind(as.data.frame(OM49))
OM49_plot <- ggplot(OM49_df,aes(x=Var1,y=Freq,fill=Freq))+geom_col()+geom_line(group=1)+theme_minimal()+theme(legend.position="none")+labs(x="OM49")+theme(axis.title.x = element_text(face="bold", vjust=-0.5, colour="orange", size=rel(1)),axis.title=element_text(size=10,face="bold"))+theme(axis.text.x = element_text(angle = 90, hjust = 1),axis.text = element_text(size=6))
grid.arrange(OM001_plot,OM010_plot,OM012_plot,OM015_plot,OM019_plot,OM025_plot,OM028_plot,OM029_plot,OM032_plot,OM04_plot,OM05_plot,OM06_plot,OM009_plot,OM33_plot,OM49_plot,ncol=5)

gene_matriz <- exprMatrix_Prot_coding
gene_matriz$ids <- gsub('_.*$', '',gene_matriz$ids)
isoforma_y_gen <- OUT_Prot_coding[,c(1,7)]
isoforma_y_gen$associated_gene <- gsub('\\..*$', '',isoforma_y_gen$associated_gene)
isoforma_id_gen <-dplyr::inner_join(isoforma_y_gen,id_nombre_gen ,by=c("associated_gene" = "Var1"))
matrix_nombre_id <- dplyr::inner_join(isoforma_id_gen,gene_matriz,by=c("isoform" = "ids"))
para_media <- matrix_nombre_id
AS_TpmRNA_mean <- apply(para_media[,c(4:18)], 1, mean)
out_cambiado_prot_coding <- cbind(para_media,AS_TpmRNA_mean)
isoforma_y_media_gen <- out_cambiado_prot_coding[,c(1,2,3,19)]
t7 <- isoforma_y_media_gen %>% distinct(isoform, .keep_all = TRUE)
datatable(t7)
Isoform <- table(round(t7$AS_TpmRNA_mean,0))
isoform_df <- rbind(as.data.frame(Isoform))
isoform_plot <- ggplot(isoform_df,aes(x=Var1,y=Freq,fill=Freq))+geom_col()+theme_minimal()+theme(legend.position="none")+labs(x="Expresión TPM")+theme(axis.title.x = element_text(face="bold", vjust=-0.5, colour="orange", size=rel(1)),axis.title=element_text(size=10,face="bold"))+theme(axis.text.x = element_text(angle = 90, hjust = 1),axis.text = element_text(size=6))+geom_line(group=1)
isoform_plot

AS_gen <- media_expresion_mas_gen[,c(5,3)]
df_genes <- rbind(as.data.frame(table(t7$X6)))
gen_id <- t7[,c(2,3)]
t18 <- gen_id %>% distinct(associated_gene, .keep_all = TRUE)
gen_sin_AS <- dplyr::left_join(t18,AS_gen,by=c("X6"="X8"))
t28 <- gen_sin_AS %>% distinct(associated_gene, .keep_all = TRUE)
datatable(t28)

4 Genes S y AS comparados

media_expresion_mas_gen_AS <-dplyr::inner_join(media_expresion_mas_gen,isoforma_y_media_gen,by=c("X8" = "X6"))
datatable(media_expresion_mas_gen_AS)

4.1 Genes sin AS.

Genes_sin_expresion_AS <- read_excel("~/Desktop/tablas_expresion/Genes_sin_expresion_AS.xlsx",col_names = FALSE)
genes_sin_AS <-dplyr::inner_join(t7,Genes_sin_expresion_AS,by=c("associated_gene" = "...1"))
Genes_sin_AS_S <- genes_sin_AS[,c(1,2,3,4)]
t100 <- Genes_sin_AS_S %>% distinct(isoform, .keep_all = TRUE)
datatable(t100)

##Ā Genes S y AS.

solo_AS_con_S <- media_expresion_mas_gen_AS %>% distinct(isoform.x, .keep_all = TRUE) #Selección para isoformas únicas de genes AS
solo_S_con_AS <- media_expresion_mas_gen_AS %>% distinct(isoform.y, .keep_all = TRUE) #Selección para isoformas únicas de genes S
#Esto se realiza para poder seleccionar las columnas Ćŗnicas para cada tipo de dato ya que al utilizar left o inner join se pueden generar datos duplicados para cumplir todas las correspondencias. 

4.1.1 Expresión

TPM_mean_GEN_sin_AS <- mean(t100$AS_TpmRNA_mean)
TPM_solo_AS_con_S <- mean(solo_AS_con_S$AS_TpmRNA_mean.x)
TPM_solo_S_con_AS <- mean(solo_S_con_AS$AS_TpmRNA_mean.y)
medias_data_frame <- data.frame("Gen Sin AS"=TPM_mean_GEN_sin_AS, "AS con S"=TPM_solo_AS_con_S, "S con AS"=TPM_solo_S_con_AS) #Dara frame para relacionar la media de expresión de cada tipo de dato
#Comparación de la mediana de expresión para cada valor obtenido anteriormente
TPM_median_GEN_sin_AS <- median(t100$AS_TpmRNA_mean)
TPM_median_solo_AS_con_S <- median(solo_AS_con_S$AS_TpmRNA_mean.x)
TPM_median_solo_S_con_AS <- median(solo_S_con_AS$AS_TpmRNA_mean.y)

median_data_frame <- data.frame("Type"=c("Gen_sin_AS","AS_con_S","S_con_AS"),"Mean"=c(TPM_median_GEN_sin_AS,TPM_median_solo_AS_con_S,TPM_median_solo_S_con_AS))

barplot(c(TPM_median_solo_AS_con_S,TPM_median_solo_S_con_AS),names.arg = c("AS","S"), main = "Mediana de TPM de expresión", xlab="TPM", col=c("lightcoral","lightblue"),horiz = TRUE,las=1, cex.names = 0.9,cex.axis=0.8,xlim=c(0,7.5))

wilcox.test(solo_AS_con_S$AS_TpmRNA_mean.x,solo_S_con_AS$AS_TpmRNA_mean.y)
## 
##  Wilcoxon rank sum test with continuity correction
## 
## data:  solo_AS_con_S$AS_TpmRNA_mean.x and solo_S_con_AS$AS_TpmRNA_mean.y
## W = 7260.5, p-value = 5.32e-09
## alternative hypothesis: true location shift is not equal to 0
isoform_median_solo_AS_con_S <- table(solo_AS_con_S$X7)
isoform_median_solo_S_con_AS <- table(solo_S_con_AS$X8)
barplot(c(median(isoform_median_solo_AS_con_S),median(isoform_median_solo_S_con_AS)),names.arg = c("AS","S"), main = "Mediana de transcritos", xlab="NĆŗmero de transcritos", col=c("lightcoral","lightblue"),horiz = TRUE,las=1, cex.names = 0.9,cex.axis=0.8,xlim=c(0,1.1))

TPM_median_GEN_sin_AS <- median(t100$AS_TpmRNA_mean)
TPM_median_solo_AS_con_S <- median(solo_AS_con_S$AS_TpmRNA_mean.x)

barplot(c(TPM_median_GEN_sin_AS,TPM_median_solo_S_con_AS),names.arg = c("No","Si"), main = "Mediana de TPM de expresión", xlab="TPM", col=c("lightcoral","lightblue"),horiz = TRUE,las=1, cex.names = 0.9,cex.axis=0.8,xlim=c(0,7.5))

wilcox.test(t100$AS_TpmRNA_mean,solo_S_con_AS$AS_TpmRNA_mean.y)
## 
##  Wilcoxon rank sum test with continuity correction
## 
## data:  t100$AS_TpmRNA_mean and solo_S_con_AS$AS_TpmRNA_mean.y
## W = 96125, p-value = 0.4894
## alternative hypothesis: true location shift is not equal to 0
TPM_median_GEN_sin_AS <- median(t100$AS_TpmRNA_mean)
TPM_median_solo_AS_con_S <- median(solo_AS_con_S$AS_TpmRNA_mean.x)
TPM_median_solo_S_con_AS <- median(solo_S_con_AS$AS_TpmRNA_mean.y)

isoform_gen_sin_As <- table(t100$X6)
isoform_gen_con_AS <- table(solo_AS_con_S$X8)

barplot(c(median(isoform_gen_sin_As),median(isoform_gen_con_AS)),names.arg = c("No","Si"), main = "Mediana de transcritos", xlab="NĆŗmero de transcritos", col=c("lightcoral","lightblue"),horiz = TRUE,las=1, cex.names = 0.9,cex.axis=0.8,xlim=c(0,1.1))

4.2 AS sin S

AS_matriz <- exprMatrix_AS
AS_matriz$ids <- gsub('_.*$', '',AS_matriz$ids)
isoforma_id_nombre <-dplyr::inner_join(isoforma_y_gen_AS,ID_AS,by=c("associated_gene" = "X1"))
id_gen_noAS_noexprx <- id_AS_name_no_expr_sent[,c(1,3)]
isoforma_id_nombre <-dplyr::inner_join(isoforma_id_nombre,id_gen_noAS_noexprx,by=c("associated_gene" = "X1"))

isoform_TPM_medio <- isoforma_y_media_AS[,c(1,4)]

uniq_matriz_AS_no_gene_id <- isoforma_id_nombre %>% distinct(isoform, .keep_all = TRUE)
tabla_AS_no_S_tpm_medio <- dplyr::inner_join(uniq_matriz_AS_no_gene_id,isoform_TPM_medio,by="isoform")
datatable(tabla_AS_no_S_tpm_medio)
sin_gen_S <- tabla_AS_no_S_tpm_medio[,c(1,2,3,5)]
AS_S_no_S <- tabla_AS_no_S_tpm_medio[,c(3,4)]
barplot(c(median(sin_gen_S$AS_TpmRNA_mean),median(solo_AS_con_S$AS_TpmRNA_mean.x)),names.arg = c("No","Si"), main = "Mediana de TPM de expresión", xlab="TPM", col=c("lightcoral","lightblue"),horiz = TRUE,las=1, cex.names = 0.9,cex.axis=0.8,xlim=c(0,3.5))

wilcox.test(sin_gen_S$AS_TpmRNA_mean,solo_AS_con_S$AS_TpmRNA_mean.x)
## 
##  Wilcoxon rank sum test with continuity correction
## 
## data:  sin_gen_S$AS_TpmRNA_mean and solo_AS_con_S$AS_TpmRNA_mean.x
## W = 3696.5, p-value = 0.4903
## alternative hypothesis: true location shift is not equal to 0
transcritos_AS_no_S <- table(sin_gen_S$X7)
transcritos_AS_si_AS <- table(solo_AS_con_S$X7)
barplot(c(median(transcritos_AS_no_S),median(transcritos_AS_si_AS)),names.arg = c("No","Si"), main = "Mediana de transcritos", xlab="NĆŗmero de transcritos", col=c("lightcoral","lightblue"),horiz = TRUE,las=1, cex.names = 0.9,cex.axis=0.8,xlim=c(0,1.1))

wilcox.test(transcritos_AS_no_S,transcritos_AS_si_AS)
## 
##  Wilcoxon rank sum test with continuity correction
## 
## data:  transcritos_AS_no_S and transcritos_AS_si_AS
## W = 2409, p-value = 0.2465
## alternative hypothesis: true location shift is not equal to 0